# look at a number of different data sets # to see situations where the regression equation # might allow us to # predict, or # have a narrow range on which to predict, or # have some idea (but not a great idea) of a range in which to predict, or # really does not help us to predict # a dependent value, y, given an independent value, x. # # source("../gnrnd4.R") # This is an example where the correlation is so high # that we could, for all practical purposes, say that # we can use the regression equation to predict the # dependent variable if we are given the independent # variable. # # generate the values gnrnd4(641292306, 0140080302, 18000070) # look at the values L1 L2 # make a scatter plot of the values plot(L1,L2) # compute the correlation coefficient cor(L1,L2) # a correlation coefficient of 0.9997968 is absurdly # close to 1. # find our regression equation hold_our_model <- lm(L2~L1) hold_our_model # so our equation is y = 4.001 + 1.501*x # plot that line abline( hold_our_model ) # use the equation to predict the value of y when # x = 175 y <- 4.001 + 1.501*175 y # plot that point points( 175, y, pch=19, col="red") ############################################### # This is an example where the correlation is high # but still allows for some variability so we can say that # we can use the regression equation to get a value within # a narrow range for the dependent variable if we # are given the independent variable. # # generate the values gnrnd4(641292306, 3140080302, 18000070) # look at the values L1 L2 # make a scatter plot of the values plot(L1,L2) # compute the correlation coefficient cor(L1,L2) # a correlation coefficient of 0.9795289 is # high. Find the percent of the variation in # the data that is explained by a linear regression. 0.9795289^2 # find our regression equation hold_our_model <- lm(L2~L1) hold_our_model # so our equation is y = 3.084 + 1.515*x # plot that line abline( hold_our_model ) # use the equation to predict the value of y when # x = 175 y <- 3.084 + 1.515*175 y # plot that point points( 175, y, pch=19, col="red") ############################################### # This is an example where the correlation is not high # so there is a good deal of variability. Then we can say that # we can use the regression equation to get a value within # a wide range for the dependent variable if we # are given the independent variable. # # generate the values gnrnd4(641292306, 6140080302, 18000070) # look at the values L1 L2 # make a scatter plot of the values plot(L1,L2) # compute the correlation coefficient cor(L1,L2) # a correlation coefficient of 0.8710001 is good, but not # high. Find the percent of the variation in # the data that is explained by a linear regression. 0.8710001^2 # find our regression equation hold_our_model <- lm(L2~L1) hold_our_model # so our equation is y = 1.551 + 1.541*x # plot that line abline( hold_our_model ) # use the equation to predict the value of y when # x = 175 y <- 1.551 + 1.541*175 y # plot that point points( 175, y, pch=19, col="red") ############################################### # This is an example where the correlation is so low that # there is a huge deal of variability. Then we can say that # we can use the regression equation but it will not give # us a good predicted value for the dependent variable if we # are given the independent variable. # # generate the values gnrnd4(641292301, 43000000) L2<-L1 gnrnd4(567472301,18000070) # look at the values L1 L2 # make a scatter plot of the values plot(L1,L2) # compute the correlation coefficient cor(L1,L2) # a correlation coefficient of 0.01674324 is terrible. # There is no expectation that the linear regression # line will tell us anything. # find our regression equation hold_our_model <- lm(L2~L1) hold_our_model # so our equation is y = 199.55392 + 0.04151*x # plot that line abline( hold_our_model ) # use the equation to predict the value of y when # x = 175 y <- 199.55392 + 0.04151*175 y # plot that point points( 175, y, pch=19, col="red")